{
 "cells": [
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "9c48213d-6e6a-4c10-838a-2a7c710c3a05",
   "metadata": {},
   "source": [
    "# HuggingFace LLM - Camel-5b"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "50d3b817-b70e-4667-be4f-d3a0fe4bd119",
   "metadata": {},
   "source": [
    "#### Load documents, build the VectorStoreIndex"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "690a6918-7c75-4f95-9ccc-d2c4a1fe00d7",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:numexpr.utils:Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
      "Note: NumExpr detected 16 cores but \"NUMEXPR_MAX_THREADS\" not set, so enforcing safe limit of 8.\n",
      "INFO:numexpr.utils:NumExpr defaulting to 8 threads.\n",
      "NumExpr defaulting to 8 threads.\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/loganm/miniconda3/envs/gpt_index/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
      "  from .autonotebook import tqdm as notebook_tqdm\n"
     ]
    }
   ],
   "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))\n",
    "\n",
    "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n",
    "from llama_index.llms import HuggingFaceLLM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "90a895ac-2e8b-4d55-97bd-ad614dceda40",
   "metadata": {
    "tags": []
   },
   "outputs": [],
   "source": [
    "# load documents\n",
    "documents = SimpleDirectoryReader(\"../../data/paul_graham\").load_data()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "b1726fff",
   "metadata": {},
   "outputs": [],
   "source": [
    "# setup prompts - specific to StableLM\n",
    "from llama_index.prompts.prompts import SimpleInputPrompt\n",
    "\n",
    "# This will wrap the default prompts that are internal to llama-index\n",
    "# taken from https://huggingface.co/Writer/camel-5b-hf\n",
    "query_wrapper_prompt = SimpleInputPrompt(\n",
    "    \"Below is an instruction that describes a task. \"\n",
    "    \"Write a response that appropriately completes the request.\\n\\n\"\n",
    "    \"### Instruction:\\n{query_str}\\n\\n### Response:\"\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "6a40a357-8a8f-405d-b355-e0caf23bee3c",
   "metadata": {
    "tags": []
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Loading checkpoint shards: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 3/3 [00:43<00:00, 14.34s/it]\n"
     ]
    }
   ],
   "source": [
    "import torch\n",
    "\n",
    "llm = HuggingFaceLLM(\n",
    "    context_window=2048,\n",
    "    max_new_tokens=256,\n",
    "    temperature=0.25,\n",
    "    do_sample=False,\n",
    "    query_wrapper_prompt=query_wrapper_prompt,\n",
    "    tokenizer_name=\"Writer/camel-5b-hf\",\n",
    "    model_name=\"Writer/camel-5b-hf\",\n",
    "    device_map=\"auto\",\n",
    "    tokenizer_kwargs={\"max_length\": 2048},\n",
    "    # uncomment this if using CUDA to reduce memory usage\n",
    "    # model_kwargs={\"torch_dtype\": torch.float16}\n",
    ")\n",
    "service_context = ServiceContext.from_defaults(chunk_size=512, llm=llm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "ad144ee7-96da-4dd6-be00-fd6cf0c78e58",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "> [build_index_from_nodes] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [build_index_from_nodes] Total embedding token usage: 27212 tokens\n",
      "> [build_index_from_nodes] Total embedding token usage: 27212 tokens\n"
     ]
    }
   ],
   "source": [
    "index = VectorStoreIndex.from_documents(documents, service_context=service_context)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "b6caf93b-6345-4c65-a346-a95b0f1746c4",
   "metadata": {},
   "source": [
    "#### Query Index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "85466fdf-93f3-4cb1-a5f9-0056a8245a6f",
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
      "> [retrieve] Total embedding token usage: 8 tokens\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Token indices sequence length is longer than the specified maximum sequence length for this model (954 > 512). Running this sequence through the model will result in indexing errors\n",
      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 1026 tokens\n",
      "> [get_response] Total LLM token usage: 1026 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
      "> [get_response] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "query_engine = index.as_query_engine()\n",
    "response = query_engine.query(\"What did the author do growing up?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "c42733dd",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The author grew up in a small town in England, attended a prestigious private school, and then went to Cambridge University, where he studied computer science. Afterward, he worked on web infrastructure, wrote essays, and then realized he could write about startups. He then started giving talks, wrote a book, and started interviewing founders for a book on startups.\n"
     ]
    }
   ],
   "source": [
    "print(response)"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "id": "6b282580",
   "metadata": {},
   "source": [
    "#### Query Index - Streaming"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "313544bc",
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine(streaming=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "c1c5ab85-25e4-4460-8b6a-3c119d92ba48",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total LLM token usage: 0 tokens\n",
      "> [retrieve] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [retrieve] Total embedding token usage: 8 tokens\n",
      "> [retrieve] Total embedding token usage: 8 tokens\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total LLM token usage: 0 tokens\n",
      "> [get_response] Total LLM token usage: 0 tokens\n",
      "INFO:llama_index.token_counter.token_counter:> [get_response] Total embedding token usage: 0 tokens\n",
      "> [get_response] Total embedding token usage: 0 tokens\n"
     ]
    }
   ],
   "source": [
    "# set Logging to DEBUG for more detailed outputs\n",
    "response_stream = query_engine.query(\"What did the author do growing up?\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "f8d94603-8e91-451f-8f60-feb0ea0c8c06",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The author grew up in a small town in England, attended a prestigious private school, and then went to Cambridge University, where he studied computer science. Afterward, he worked on web infrastructure, wrote essays, and then realized he could write about startups. He then started giving talks, wrote a book, and started interviewing founders for a book on startups.<|endoftext|>"
     ]
    }
   ],
   "source": [
    "# can be slower to start streaming since llama-index often involves many LLM calls\n",
    "response_stream.print_response_stream()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "b29749ee",
   "metadata": {},
   "outputs": [],
   "source": [
    "# can also get a normal response object\n",
    "response = response_stream.get_response()\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d14c15b3",
   "metadata": {},
   "outputs": [],
   "source": [
    "# can also iterate over the generator yourself\n",
    "generated_text = \"\"\n",
    "for text in response.response_gen:\n",
    "    generated_text += text"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "020934ba",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "gpt_index",
   "language": "python",
   "name": "gpt_index"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
